library(tidyverse)

sra.meta <- read_delim('CRC-I/microbiota/SraRunTable.csv')

sra.meta$timepoint |> table()

sra.meta <- sra.meta |>
  select(Run, AGE, BMI, Cohort, gender, patientid, timepoint) |>
  write_csv('CRC-I/microbiota/mars20.sra.meta.csv')

sra.meta |> DT::datatable()

ebi.meta <- read_delim("~/append-ssd/alaria2/mars20ibs/nxf_input.csv")

ebi.meta |> mutate(sample = str_remove(sample, '_.'),
                   readgroup = NULL) |>
  write_csv('/home/supervisor/mist2/gjsx/mars20ibs/nxf_input.mars20.csv')

## 2 batches ------
rnaseq.b1 <- read_csv(c('~/mist2/mars20ibs/rnaseq.head10.csv',
                        '~/mist2/mars20ibs/rnaseq.tail10.csv'))

rnaseq.b1 |>
  mutate(readgroup = str_remove(sample, '_.')) |>
  count(readgroup)

ebi.meta |>
  filter(!(sample %in% rnaseq.b1$sample))

# vcf parsing ------
mars68 <- read_delim('mars68.i2t.vcf', comment = '##') |>
  pivot_longer(10:last_col())

mars68 |>
  mutate(genotype = ifelse(str_detect(value, '^0\\/0'), 'II','IT')) |>
  select(name, genotype) |>
  write_csv('CRC-I/microbiota/mars20.i2t.meta.csv')

group.meta <- read_delim(('CRC-I/microbiota/supp.meta.tsv'))

geno.meta <- read_csv('CRC-I/microbiota/mars20.i2t.meta.csv')

group.meta |>
  mutate(name = str_c('X', `Study ID`)) |>
  right_join(geno.meta) |>
  ggplot(aes(y = genotype, fill = Cohort)) +
  geom_bar(position = 'fill') +
  scale_fill_manual(values = c('green3','red3','orange3'),
                    labels = c('HC','IBS-Constipation','IBS-Diarrhea')) +
  theme_pubr() +
  labs(title = "FCGR2B-I232T genotype in IBS patients",
       x = 'Fraction', subtitle = 'GSE146853 (n=42)')
